Plot duplications in tree

Author

Claudia Zirión-Martínez

Published

February 13, 2025

Setup

Libraries

Code
library(tidyverse)
library(ggtree)
library(ggtreeExtra)
library(ape)
library(ggnewscale)
library(RColorBrewer)
library(svglite)
source("scripts/metadata_colors.R")

Paths

Code
metadata_path <- 
    "data/processed/metadata_ashton_desj_all_weavepop_final_H99.csv"
duplications_path <- 
    "results/tables/duplications.tsv"
chrom_metrics_path <-
    "results/tables/chromosome_cnv_categories.tsv"
merged_tree_path <- 
    "data/processed/tree_merged.newick"
tree_merged_duplications_path1 <- 
    "results/trees_dups/tree_duplications_full1.png"
tree_merged_duplications_path2 <- 
    "results/trees_dups/tree_duplications_full2.png"
tree_merged_duplications_path3 <- 
    "results/trees_dups/tree_duplications_full3.png"
tree_merged_duplications_path4 <- 
    "results/trees_dups/tree_duplications_full4.png"
tree_merged_duplications_path5 <- 
    "results/trees_dups/tree_duplications_full5.png"
tree_merged_duplications_path6 <- 
    "results/trees_dups/tree_duplications_full6.png"
tree_merged_duplications_small1 <-  
    "results/trees_dups/tree_duplications_small1.png"
tree_merged_duplications_small2 <-  
    "results/trees_dups/tree_duplications_small2.png"
tree_merged_duplications_small3 <-  
    "results/trees_dups/tree_duplications_small3.png"
tree_merged_duplications_small4 <-  
    "results/trees_dups/tree_duplications_small4.png"
tree_merged_duplications_small5 <-  
    "results/trees_dups/tree_duplications_small5.png"
tree_merged_duplications_small6 <-  
    "results/trees_dups/tree_duplications_small6.png"
tree_merged_duplications_small7 <-  
    "results/trees_dups/tree_duplications_small7.png"
tree_merged_duplications_small8 <-  
    "results/trees_dups/tree_duplications_small8.png"
tree_merged_duplications_small9 <-  
    "results/trees_dups/tree_duplications_small9.png"
tree_merged_duplications_small10 <-  
    "results/trees_dups/tree_duplications_small10.png"

Metadata

Load the necessary data

Code
metadata <- read.csv(
    metadata_path,
    header = TRUE)%>%
    select(strain, everything())

Get one dataframe for each variable to be plotted as a separate metadata column in the tree

Code
metadata$vni_subdivision <- factor(metadata$vni_subdivision,
                            levels = c("VNIa-4", "VNIa-5", "VNIa-32", 
                            "VNIa-93", "VNIa-X", "VNIa-Y", "VNIb", 
                            "VNIc", "VNIa-outlier"))
metadata$country_of_origin <- factor(metadata$country_of_origin,
                                levels = names(country_colors))
metadata$continent <- factor(metadata$continent,
                                levels = names(continent_colors))            

sublineage <- metadata %>%
                filter(lineage == "VNI")%>%
                select(strain, vni_subdivision)%>%
                column_to_rownames("strain")%>%
                droplevels()
lineage <- metadata %>%
            select(strain, lineage)%>%
            column_to_rownames("strain")
dataset <- metadata %>%
            select(strain, dataset)%>%
            column_to_rownames("strain")
source <- metadata %>%
            select(strain, source)%>%
            column_to_rownames("strain")
country <- metadata %>%
            select(strain, country_of_origin)%>%
            column_to_rownames("strain")     
continent <- metadata %>%
            select(strain, continent)%>%
            column_to_rownames("strain")  

Duplications

Code
duplications <- read.delim(
    duplications_path,
    sep = "\t", header = TRUE, stringsAsFactors = TRUE)
Code
duplications_full <- duplications %>%
    select(strain, chromosome) %>%
    distinct()

Make matrix of duplicated chromosomes

Code
dup_chroms <- duplications_full %>%
    select(strain, chromosome)%>%
    mutate(duplicated_full = 1)%>%
    arrange(chromosome)%>%
    pivot_wider(names_from = chromosome, values_from = duplicated_full, values_fill = 0)%>%
    column_to_rownames("strain")%>%
    mutate(across(everything(), ~ ifelse(. == 1, cur_column(),"Euploid")))

euploid_strain <- metadata %>%
    filter(!strain %in% duplications_full$strain)%>%
    select(strain)

for (chrom in colnames(dup_chroms)){
    euploid_strain[chrom] <- "Euploid"
}

dup_chroms <- euploid_strain %>%
    column_to_rownames("strain") %>%
    bind_rows(dup_chroms)
Code
n_dups <- duplications_full %>% 
    group_by(chromosome)%>%
    summarize(n = n ())
n_dups
chromosome n
chr01 2
chr04 1
chr06 2
chr09 6
chr12 12
chr13 12
chr14 3

CNV categories and metrics per chromosome

Code
chrom_metrics <- read.delim(
    chrom_metrics_path,
    header = TRUE,
    sep = "\t"
)
chrom_metrics$category <- factor(chrom_metrics$category, levels = c("absent", "small", "partial", "full"))

Make matrix of coverage percent per chromosome for duplications and deletions

Code
dup_coverage <- chrom_metrics %>%
    filter(cnv == "duplication")%>%
    select(strain, chromosome, coverage_percent)%>%
    pivot_wider(names_from = chromosome, values_from = coverage_percent)%>%
    column_to_rownames("strain") 

del_coverage <- chrom_metrics %>%
    filter(cnv == "deletion")%>%
    select(strain, chromosome, coverage_percent)%>%
    pivot_wider(names_from = chromosome, values_from = coverage_percent)%>%
    column_to_rownames("strain") 

Make matrix of category per chromosome for duplications and deletions

Code
dup_category <- chrom_metrics %>%
    filter(cnv == "duplication")%>%
    select(strain, chromosome, category)%>%
    pivot_wider(names_from = chromosome, values_from = category)%>%
    column_to_rownames("strain") 

del_category <- chrom_metrics %>%
    filter(cnv == "deletion")%>%
    select(strain, chromosome, category)%>%
    pivot_wider(names_from = chromosome, values_from = category)%>%
    column_to_rownames("strain") 

Full tree plots

Tree

Code
tree <- read.tree(merged_tree_path)

Remove tips that are not in metadata$strain

Code
tree <- drop.tip(tree, setdiff(tree$tip.label, metadata$strain))

Lineage nodes

Get the node number of the Most Recent Common Ancestor of each lineage

Code
VNI_node <- getMRCA(tree, c("Tu241-1","UI_31647-2"))
VNII_node <- getMRCA(tree, c("C2","C12"))
VNBI_node <- getMRCA(tree, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(tree, c("MW-RSA3321","MW-RSA3179"))

VNIa4_node <- getMRCA(tree, c("04CN-30-008","UI_31647-2"))
VNIa5_node <- getMRCA(tree, c("BMD852","14936_1#45"))
VNIa93_node <- getMRCA(tree, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(tree, c("BMD942","BMD2801"))
VNIaX_node <- getMRCA(tree, c("Bt48","04CN-63-007"))
VNIaY_node <- getMRCA(tree, c("04CN-65-073","Bt138"))

VNIa_node <- getMRCA(tree, c("04CN-30-008","BMD852"))
VNIb_node <- getMRCA(tree, c("04CN-65-096","MW-RSA722"))
VNIc_node <- getMRCA(tree, c("Bt20","Bt11"))
Code
nodes_lineages <- data.frame(
    lineage = c("VNI", "VNII", "VNBI", "VNBII"),
    mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node))

nodes_sublineages <- data.frame(
    sublineage = c("VNII", "VNBII", "VNBI", "VNIb","VNIc", "VNIa"),
    mrca = c(VNII_node, VNBII_node, VNBI_node, VNIb_node, VNIc_node, VNIa_node),
    shading = c("gray30", "gray60","gray30", "gray60","gray30", "gray60"))

nodes_vnisublineages <- data.frame(
    sublineage = c("VNIb","VNIc", "VNIa"),
    mrca = c(VNIb_node, VNIc_node, VNIa_node),
    shading = c("gray90", "gray70","gray90"))

nodes_vniasublineages <- data.frame(
    sublineage = c("VNIb", "VNIc", "VNIa-4", "VNIa-32", "VNIa-93", "VNIa-5"),
    mrca = c(VNIb_node, VNIc_node, VNIa4_node, VNIa32_node, VNIa93_node, VNIa5_node),
    shading = c("gray70", "gray90","gray70", "gray90","gray70", "gray90"))
Code
sublineage_shading <- nodes_sublineages$shading
names(sublineage_shading) <- nodes_sublineages$sublineage

vnisublineage_shading <- nodes_vnisublineages$shading
names(vnisublineage_shading) <- nodes_vnisublineages$sublineage

vniasublineage_shading <- nodes_vniasublineages$shading
names(vniasublineage_shading) <- nodes_vniasublineages$sublineage

Color palette

Code
chrom_colors <- brewer.pal(7, "Dark2")
names(chrom_colors) <- c("chr01", "chr04",
                         "chr06", "chr09", 
                         "chr12","chr13", "chr14")
chrom_dup_colors <- c(chrom_colors, "Euploid" = "grey93")

countries_final <- levels(droplevels(country[rownames(country) %in% tree$tip.label, ]))

Country, source, duplications in one ring per chromosome

Code
a <- ggtree(tree, 
        ladderize = TRUE,
        layout = "circular", 
        branch.length = "none",
        size = 0.1) %<+%  metadata +
    geom_tiplab(color = "black", size = 0.3, offset = 0.01)+
    geom_hilight(data=nodes_vnisublineages, 
        aes(node=mrca, fill=sublineage), alpha = 0.8)+
        scale_fill_manual(name = "Sublineage", values = vnisublineage_shading)+
    guides(fill = FALSE)+
    new_scale_fill()+
    geom_tree(size = 0.1)+
    geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]), 
                        size = 2, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)+
    geom_tippoint(aes(color = country_of_origin), shape = 18,
                size = 0.01)+
    scale_color_manual(name = "Country", values = country_colors,
                        limits = countries_final)+
    guides(color = guide_legend(override.aes = list(size = 5), order = 1, ncol = 2))

a1 <- gheatmap(a, source, width=.05, colnames=FALSE, offset=3.7) +
        scale_fill_manual(values = source_colors, name="Source", na.translate = FALSE)+
        guides(fill = guide_legend(order = 2))+
        new_scale_fill()
a2 <- gheatmap(a1, dup_chroms, width=.32, colnames = FALSE, offset=6) +
    scale_fill_manual(values = chrom_dup_colors, 
                    name="Duplicated\nchromosomes", 
                    na.translate = FALSE )+
    guides(fill = guide_legend(order = 5))+
        geom_cladelab(data = nodes_vnisublineages, 
                mapping = aes(node = mrca, label = sublineage),
                align = TRUE, face = "bold",
                fontsize = 3,
                angle = "auto", offset = 20)+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
a2

Code
ggsave(tree_merged_duplications_path1, a2, height = 8, width = 6.5, units = "in", dpi = 1000)

Country, source, duplications in one ring per chromosome, lineage and sublineage labels

Code
m <- ggtree(tree, 
        ladderize = TRUE,
        layout = "circular", 
        branch.length = "none",
        size = 0.1) %<+%  metadata +
    geom_tree(size = 0.1)+
    geom_text(aes(label = nodes_sublineages$sublineage[match(node, nodes_sublineages$mrca)]), 
                        size = 2, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)

m1 <- gheatmap(m, continent, width=.05, colnames=FALSE) +
        scale_fill_manual(values = continent_colors, name="Continent",
            na.translate = FALSE, limits = names(continent_colors))+
        guides(fill = guide_legend(order = 1, ncol = 2))+
        new_scale_fill()

m2 <- gheatmap(m1, source, width=.05, colnames=FALSE, offset=2.3) +
        scale_fill_manual(values = source_colors, name="Source", na.translate = FALSE)+
        guides(fill = guide_legend(order = 2))+
        new_scale_fill()
m3 <- gheatmap(m2, dup_chroms, width=.32, colnames = FALSE, offset=4.7) +
    scale_fill_manual(values = chrom_dup_colors, 
                    name="Duplicated\nchromosomes", 
                    na.translate = FALSE )+
    guides(fill = guide_legend(order = 3, ncol = 2))+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_path2, m3, height = 8, width = 6.5, units = "in", dpi = 1000)

Country, source, duplications in one ring per chromosome, lineage and sublineage labels, tiplabels

Code
m <- ggtree(tree, 
        ladderize = TRUE,
        layout = "circular", 
        branch.length = "none",
        size = 0.1) %<+%  metadata +
    geom_tree(size = 0.1)+
    geom_text(aes(label = nodes_sublineages$sublineage[match(node, nodes_sublineages$mrca)]), 
                        size = 2, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)+
    geom_tiplab(color = "black", size = 0.3, offset = 0.01)

m1 <- gheatmap(m, continent, width=.05, colnames=FALSE,offset=3.7 ) +
        scale_fill_manual(values = continent_colors, name="Continent",
            na.translate = FALSE, limits = names(continent_colors))+
        guides(fill = guide_legend(order = 1, ncol = 2))+
        new_scale_fill()

m2 <- gheatmap(m1, source, width=.05, colnames=FALSE, offset=6) +
        scale_fill_manual(values = source_colors, name="Source", na.translate = FALSE)+
        guides(fill = guide_legend(order = 2))+
        new_scale_fill()
m3 <- gheatmap(m2, dup_chroms, width=.32, colnames = FALSE, offset=8) +
    scale_fill_manual(values = chrom_dup_colors, 
                    name="Duplicated\nchromosomes", 
                    na.translate = FALSE )+
    guides(fill = guide_legend(order = 3, ncol = 2))+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_path3, m3, height = 8, width = 6.5, units = "in", dpi = 1000)

Country, source, lineage and sublineage labels, tiplabs, coverage percent of duplications

Code
m3 <- gheatmap(m2, dup_coverage, width=.8, colnames = FALSE, offset=8) +
    scale_fill_viridis_c(name = "Dup Coverage", direction = -1, na.value = "white")+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_path3, m3, height = 8, width = 6.5, units = "in", dpi = 1000)

Country, source, lineage and sublineage labels, tiplabs, category of duplications

Code
category_colors <-c("#F7FBFF", "#DEEBF7","#2171B5", "#8E0152")
names(category_colors) <- levels(chrom_metrics$category)
Code
m3 <- gheatmap(m2, dup_category, width=.8, colnames = FALSE, offset=8) +
    scale_fill_manual(name = "Category",values = category_colors,
         na.value = "white", limits = levels(chrom_metrics$category))+
    # scale_fill_brewer(name = "Category",palette = "PuBuGn",
    #         na.value = "white", limits = levels(chrom_metrics$category))+
    guides(fill = guide_legend(order = 3, ncol = 2))+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_path4, m3, height = 8, width = 6.5, units = "in", dpi = 1000)

Country, source and duplications in one ring

Code
aneuploid <- duplications_full %>%
    group_by(strain)%>%
    summarise(chromosome = str_c(chromosome, collapse="_")) %>%
    right_join(select(metadata, strain), by = "strain")%>%
    mutate(chromosome = ifelse(is.na(chromosome), "Euploid", chromosome))%>%
    column_to_rownames("strain")
Code
dup_colors <- brewer.pal(8, "Dark2")
names(dup_colors) <- c("chr01", "chr04_chr13",
                         "chr06", "chr09", 
                         "chr12","chr13", "chr14",
                         "chr14_chr13")
dup_colors <- c(dup_colors, "Euploid" = "grey93")
Code
md <- gheatmap(a1, aneuploid, width=.05, colnames = FALSE, offset=5.8) +
    scale_fill_manual(name = "Duplicated\nchromosomes", 
                        values = dup_colors)+
    guides(fill = guide_legend(order = 3))+
    geom_cladelab(data = nodes_vnisublineages, 
            mapping = aes(node = mrca, label = sublineage),
            align = TRUE, face = "bold",
            fontsize = 3,
            angle = "auto", offset = 9)+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
md

Code
ggsave(tree_merged_duplications_path5, md, height = 8, width = 6.5, units = "in", dpi = 1000)

Dataset, country, source, duplications in one ring per chromosome no tiplab

Code
m <- ggtree(tree, 
        ladderize = TRUE,
        layout = "circular", 
        branch.length = "none",
        size = 0.1) %<+%  metadata +
    geom_hilight(data=nodes_sublineages, 
        aes(node=mrca, fill=sublineage), alpha = 0.3)+
        scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
    guides(fill = FALSE)+
    new_scale_fill()+
    geom_tree(size = 0.1)

m1 <- gheatmap(m, continent, width=.05, colnames=FALSE) +
        scale_fill_manual(values = continent_colors, name="Continent",
            na.translate = FALSE, limits = names(continent_colors))+
        guides(fill = guide_legend(order = 1, ncol = 2))+
        new_scale_fill()

m2 <- gheatmap(m1, source, width=.05, colnames=FALSE, offset=2.3) +
        scale_fill_manual(values = source_colors, name="Source", na.translate = FALSE)+
        guides(fill = guide_legend(order = 2))+
        new_scale_fill()
m3 <- gheatmap(m2, dup_chroms, width=.32, colnames = FALSE, offset=4.7) +
    scale_fill_manual(values = chrom_dup_colors, 
                    name="Duplicated\nchromosomes", 
                    na.translate = FALSE )+
    guides(fill = guide_legend(order = 3, ncol = 2))+
    geom_cladelab(data = nodes_sublineages, 
                mapping = aes(node = mrca, label = sublineage, color = sublineage),
                align = TRUE, face = "bold",
                fontsize = 3,
                angle = "auto", offset = 18)+
    scale_color_manual(name = "Sublineage", values = sublineage_shading)+
    guides(color = FALSE)+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_path6, m3, height = 8, width = 6.5, units = "in", dpi = 1000)

Small tree plots of partial and full duplications

Code
keep_strains <- chrom_metrics %>%
    filter(cnv == "duplication", category %in% c("partial", "full"))%>%
    pull(strain)
keep_strains <- c(keep_strains, "H99", "Bt22", "Bt89")
tree_dups <- drop.tip(tree, setdiff(tree$tip.label, keep_strains))
sublineage <- sublineage %>%
                filter(rownames(.) %in% keep_strains)%>%
                droplevels()

Lineage nodes

Get the node number of the Most Recent Common Ancestor of each lineage

Code
VNI_node <- getMRCA(tree_dups, c("Bt139","H99"))
VNII_node <- getMRCA(tree_dups, c("8-1","C12"))
VNBI_node <- getMRCA(tree_dups, c("Bt22","NRHc5045.ENR.CLIN.ISO"))
VNBII_node <- getMRCA(tree_dups, c("Bt109","Bt89"))

VNIa4_node <- getMRCA(tree_dups, c("20427_3#26","20427_4#13"))
VNIa5_node <- getMRCA(tree_dups, c("Bt139","Bt141"))
VNIa93_node <- getMRCA(tree_dups, c("04CN-64-024","04CN-64-011"))
VNIa32_node <- getMRCA(tree_dups, c("04CN-65-072","In2632"))

VNIa_node <- getMRCA(tree_dups, c("20427_3#26", "Bt139"))
VNIb_node <- getMRCA(tree_dups, c("H99","MW-RSA6134"))
VNIc_node <- getMRCA(tree_dups, c("LP-RSA3042","PMHc1031A.ENR.INI.LP"))
Code
nodes_lineages <- data.frame(
    lineage = c("VNI", "VNII", "VNBI", "VNBII"),
    mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node))

nodes_sublineages <- data.frame(
    sublineage = c("VNII", "VNBII", "VNBI", "VNIb","VNIc", "VNIa"),
    mrca = c(VNII_node, VNBII_node, VNBI_node, VNIb_node, VNIc_node, VNIa_node),
    shading = c("gray70", "gray90","gray70", "gray90","gray70", "gray90"))

nodes_vnisublineages <- data.frame(
    sublineage = c("VNIb","VNIc", "VNIa"),
    mrca = c(VNIb_node, VNIc_node, VNIa_node),
    shading = c("gray90", "gray70","gray90"))

nodes_vniasublineages <- data.frame(
    sublineage = c("VNIb", "VNIc", "VNIa-4", "VNIa-32", "VNIa-93", "VNIa-5"),
    mrca = c(VNIb_node, VNIc_node, VNIa4_node, VNIa32_node, VNIa93_node, VNIa5_node),
    shading = c("gray70", "gray90","gray70", "gray90","gray70", "gray90"))
Code
sublineage_shading <- nodes_sublineages$shading
names(sublineage_shading) <- nodes_sublineages$sublineage

vnisublineage_shading <- nodes_vnisublineages$shading
names(vnisublineage_shading) <- nodes_vnisublineages$sublineage

vniasublineage_shading <- nodes_vniasublineages$shading
names(vniasublineage_shading) <- nodes_vniasublineages$sublineage
Code
countries_final <- levels(droplevels(country[rownames(country) %in% tree_dups$tip.label, ]))

Dataset, country, duplications

Code
m <- ggtree(tree_dups, 
        ladderize = TRUE,
        layout = "rectangular", 
        branch.length = "none",
        size = 0.1) %<+%  metadata +
    geom_tiplab(color = "black", size = 1.5, offset = 0.1)+
    geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]), 
                        size = 2, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)+
    geom_hilight(data=nodes_vnisublineages, 
        aes(node=mrca, fill=sublineage), alpha = 0.8)+
        scale_fill_manual(name = "Sublineage", values = vnisublineage_shading)+
    guides(fill = FALSE)+
    new_scale_fill()+
    geom_tree(size = 0.1)+
    geom_tippoint(aes(color = dataset), shape = 18,
                size = 1.5)+
    scale_color_manual(name = "Dataset", values = dataset_colors)+
    guides(color = guide_legend(override.aes = list(size = 5), order = 1))

m1 <- gheatmap(m, country, width=.03, colnames=FALSE, offset=2.3) +
        scale_fill_manual(values = country_colors, name="Country",
            na.translate = FALSE, limits = countries_final)+
        guides(fill = guide_legend(order = 2, ncol = 2))+
        new_scale_fill()
Code
ggsave(tree_merged_duplications_small1, m1, height = 5, width = 6.5, units = "in", dpi = 900)
Code
m3 <- gheatmap(m1, dup_category, width=.25, colnames = TRUE,
                colnames_position = "top",
                colnames_angle = 45,
                colnames_offset_y = 0.5,
                font.size = 1.5, offset=2.8) +
    ylim(0,80)+
    scale_fill_manual(name = "Category",values = category_colors,
         na.value = "white", limits = levels(chrom_metrics$category))+
    guides(fill = guide_legend(order = 5, ncol = 2))+
    geom_cladelab(data = nodes_vnisublineages, 
                mapping = aes(node = mrca, label = sublineage),
                align = TRUE, face = "bold",
                fontsize = 3,
                angle = 0, offset = 7)+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_small2, m3, height = 5, width = 6.5, units = "in", dpi = 900)
Code
m <- ggtree(tree_dups, 
        ladderize = TRUE,
        layout = "rectangular", 
        branch.length = "none",
        size = 0.2) %<+%  metadata +
    geom_tiplab(color = "black", size = 1.5, offset = 0.01)+
    geom_text(aes(label = nodes_sublineages$sublineage[match(node, nodes_sublineages$mrca)]), 
                        size = 2, fontface = "bold",
                        hjust = 1.1, vjust = -0.5)
Code
m3 <- gheatmap(m, dup_category, width=.25, colnames = TRUE,
                colnames_position = "top",
                colnames_angle = 90,
                colnames_offset_y = 1,
                font.size = 1.5, offset=2.3) +
    ylim(0,70)+
    scale_fill_manual(name = "Category",values = category_colors,
         na.value = "white", limits = levels(chrom_metrics$category))+
    guides(fill = guide_legend(order = 5, ncol = 2))+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_small3, m3, height = 5, width = 6.5, units = "in", dpi = 900)
Code
m3 <- gheatmap(m, dup_coverage, width=.25, colnames = TRUE,
                colnames_position = "top",
                colnames_angle = 90,
                colnames_offset_y = 1,
                font.size = 1.5, offset=2.3) +
    ylim(0,70)+
    scale_fill_viridis_c(name = "Dup Coverage", direction = -1, na.value = "white")+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_small4, m3, height = 5, width = 6.5, units = "in", dpi = 900)

Small tree plots of full duplications

Code
keep_strains <- c(levels(duplications_full$strain), "H99", "Bt22", "Bt89")
tree_dups <- drop.tip(tree, setdiff(tree$tip.label, keep_strains))
sublineage <- sublineage %>%
                filter(rownames(.) %in% keep_strains)%>%
                droplevels()

Lineage nodes

Get the node number of the Most Recent Common Ancestor of each lineage

Code
VNI_node <- getMRCA(tree_dups, c("Bt139","H99"))
VNII_node <- getMRCA(tree_dups, c("8-1","C12"))
VNBI_node <- getMRCA(tree_dups, c("Bt22","NRHc5045.ENR.CLIN.ISO"))
VNBII_node <- getMRCA(tree_dups, c("Bt109","Bt89"))

VNIa4_node <- getMRCA(tree_dups, c("20427_3#26","20427_4#13"))
VNIa5_node <- getMRCA(tree_dups, c("Bt139","Bt141"))
VNIa93_node <- getMRCA(tree_dups, c("04CN-64-024","04CN-64-011"))
VNIa32_node <- getMRCA(tree_dups, c("04CN-65-072","In2632"))

VNIa_node <- getMRCA(tree_dups, c("20427_3#26", "Bt139"))
VNIb_node <- getMRCA(tree_dups, c("H99","MW-RSA6134"))
VNIc_node <- getMRCA(tree_dups, c("LP-RSA3042","PMHc1031A.ENR.INI.LP"))
Code
nodes_lineages <- data.frame(
    lineage = c("VNI", "VNII", "VNBI", "VNBII"),
    mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node))

nodes_sublineages <- data.frame(
    sublineage = c("VNII", "VNBII", "VNBI", "VNIb","VNIc", "VNIa"),
    mrca = c(VNII_node, VNBII_node, VNBI_node, VNIb_node, VNIc_node, VNIa_node),
    shading = c("gray70", "gray90","gray70", "gray90","gray70", "gray90"))

nodes_vnisublineages <- data.frame(
    sublineage = c("VNIb","VNIc", "VNIa"),
    mrca = c(VNIb_node, VNIc_node, VNIa_node),
    shading = c("gray90", "gray70","gray90"))

nodes_vniasublineages <- data.frame(
    sublineage = c("VNIb", "VNIc", "VNIa-4", "VNIa-32", "VNIa-93", "VNIa-5"),
    mrca = c(VNIb_node, VNIc_node, VNIa4_node, VNIa32_node, VNIa93_node, VNIa5_node),
    shading = c("gray70", "gray90","gray70", "gray90","gray70", "gray90"))
Code
sublineage_shading <- nodes_sublineages$shading
names(sublineage_shading) <- nodes_sublineages$sublineage

vnisublineage_shading <- nodes_vnisublineages$shading
names(vnisublineage_shading) <- nodes_vnisublineages$sublineage

vniasublineage_shading <- nodes_vniasublineages$shading
names(vniasublineage_shading) <- nodes_vniasublineages$sublineage
Code
countries_final <- levels(droplevels(country[rownames(country) %in% tree_dups$tip.label, ]))

Dataset, country, duplications

Code
m <- ggtree(tree_dups, 
        ladderize = TRUE,
        layout = "rectangular", 
        branch.length = "none",
        size = 0.1) %<+%  metadata +
    geom_tiplab(color = "black", size = 1.5, offset = 0.1)+
    geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]), 
                        size = 2, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)+
    geom_hilight(data=nodes_vnisublineages, 
        aes(node=mrca, fill=sublineage), alpha = 0.8)+
        scale_fill_manual(name = "Sublineage", values = vnisublineage_shading)+
    guides(fill = FALSE)+
    new_scale_fill()+
    geom_tree(size = 0.1)+
    geom_tippoint(aes(color = dataset), shape = 18,
                size = 1.5)+
    scale_color_manual(name = "Dataset", values = dataset_colors)+
    guides(color = guide_legend(override.aes = list(size = 5), order = 1))

m1 <- gheatmap(m, country, width=.03, colnames=FALSE, offset=2.3) +
        scale_fill_manual(values = country_colors, name="Country",
            na.translate = FALSE, limits = countries_final)+
        guides(fill = guide_legend(order = 2, ncol = 2))+
        new_scale_fill()

m3 <- gheatmap(m1, dup_chroms, width=.25, colnames = FALSE, offset=2.8) +
    scale_fill_manual(values = chrom_dup_colors, 
                    name="Duplicated\nchromosomes", 
                    na.translate = FALSE )+
    guides(fill = guide_legend(order = 5, ncol = 2))+
        geom_cladelab(data = nodes_vnisublineages, 
                mapping = aes(node = mrca, label = sublineage),
                align = TRUE, face = "bold",
                fontsize = 3,
                angle = 0, offset = 5.8)+
    theme(legend.position = "bottom",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_small5, m3, height = 5, width = 6.5, units = "in", dpi = 900)

Lineage-sublineage, duplications

Code
m <- ggtree(tree_dups, 
        ladderize = TRUE,
        layout = "rectangular", 
        branch.length = "none",
        size = 0.1) %<+%  metadata +
    geom_text(aes(label = nodes_sublineages$sublineage[match(node, nodes_sublineages$mrca)]), 
                        size = 2, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)

m3 <- gheatmap(m, dup_chroms, width=.25, 
                colnames = TRUE, colnames_position = "top",
                colnames_angle = 45,
                colnames_offset_y = 0.5,
                font.size = 2,
                offset=0.1) +
    scale_fill_manual(values = chrom_dup_colors, 
                    name="Duplicated\nchromosomes", 
                    na.translate = FALSE )+
    guides(fill = guide_legend(order = 5))+
    theme(legend.position = "right",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_small6, m3, height = 5, width = 6.5, units = "in", dpi = 900)

Lineage-sublineage, duplications (one colors)

Code
chrom_colors <- rep("deepskyblue4",7)
names(chrom_colors) <- c("chr01", "chr04",
                         "chr06", "chr09", 
                         "chr12","chr13", "chr14")

chrom_dup_colors <- c(chrom_colors, "Euploid" = "grey93")
Strain labels
Code
m <- ggtree(tree_dups, 
        ladderize = TRUE,
        layout = "rectangular", 
        branch.length = "none",
        size = 0.2) %<+%  metadata +
    geom_tiplab(color = "black", size = 2, offset = 0.01)+
    geom_text(aes(label = nodes_sublineages$sublineage[match(node, nodes_sublineages$mrca)]), 
                        size = 2, fontface = "bold",
                        hjust = 1.1, vjust = -0.5)

m3 <- gheatmap(m, dup_chroms, width=.5, 
                colnames = TRUE, colnames_position = "top",
                colnames_angle = 0,
                colnames_offset_y = 0.5,
                font.size = 2,
                offset=3) +
    scale_fill_manual(values = chrom_dup_colors, 
                    name="Duplicated\nchromosomes", 
                    na.translate = FALSE )+
    guides(fill = guide_legend(order = 5))+
    theme(legend.position = "none",
    legend.direction = "vertical")
m3

Code
ggsave(tree_merged_duplications_small7, m3, height = 4.5, width = 6.5, units = "in", dpi = 900)
No strain labels
Code
# 1. Create a dummy matrix with NA values
dummy <- dup_chroms
dummy[,] <- NA

# 2. Set colnames to your n values (make sure order matches)
chrom_order <- colnames(dup_chroms)
n_labels <- n_dups$n[match(chrom_order, n_dups$chromosome)]
unique_labels <- paste0(chrom_order, "\n", n_labels)
colnames(dummy) <- unique_labels
Code
m <- ggtree(tree_dups, 
        ladderize = TRUE,
        layout = "rectangular", 
        branch.length = "none",
        size = 0.2) %<+%  metadata +
    geom_text(aes(label = nodes_sublineages$sublineage[match(node, nodes_sublineages$mrca)]), 
                        size = 2.4, fontface = "bold",
                        hjust = 1.1, vjust = -0.5)+
    ylim(0,41)

m2 <-   gheatmap(
    p = m, 
    data = dummy, 
    width = 0.6, 
    colnames = TRUE, 
    colnames_position = "bottom", 
    colnames_angle = 0, 
    colnames_offset_y = 0.5, 
    font.size = 2.5, 
    offset = 0,
    color = NA
  ) +
  scale_fill_manual(values = c("transparent"), na.value = "transparent", guide = "none")

m3 <- gheatmap(m2, dup_chroms, width=.6, 
                colnames = TRUE, colnames_position = "top",
                colnames_angle = 0,
                colnames_offset_y = 0.5,
                font.size = 2.5,
                offset=0) +
    scale_fill_manual(values = chrom_dup_colors, 
                    name="Duplicated\nchromosomes", 
                    na.translate = FALSE )+
    guides(fill = guide_legend(order = 5))+
    theme(legend.position = "none",
    legend.direction = "vertical")

m3

Code
ggsave(tree_merged_duplications_small8, m3, height = 4.5, width = 6.5, units = "in", dpi = 900)

Lineage-sublineage, duplications in one bar

Code
m3<- gheatmap(m, aneuploid, width=.05, colnames = FALSE, offset=0.5) +
        scale_fill_brewer(palette = "Dark2",
                    name="Duplicated\nchromosomes", 
                    na.translate = FALSE )
m3

Code
ggsave(tree_merged_duplications_small9, m3, height = 5, width = 6.5, units = "in", dpi = 900)